{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 模拟两个正态分布的参数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from numpy import *\n",
    "import numpy as np\n",
    "import random\n",
    "import copy\n",
    "import matplotlib.pyplot as plt\n",
    "import math\n",
    "import matplotlib.mlab as mlab"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "均值不同的样本"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "EPS = 0.0001\n",
    "def generate_data():\t\n",
    "\tmu1 = 2\n",
    "\tmu2 = 6\n",
    "\tsigma1 = 0.1\n",
    "\tsigma2 = 0.5\n",
    "\talpha1 = 0.4\n",
    "\talpha2 = 0.6\n",
    "\tN = 5000\n",
    "\tN1 = int(alpha1 * N)\n",
    "\tX = mat(zeros((N,1)))\n",
    "\tfor i in range(N1):\n",
    "\t\tu1 = random.uniform(-1,1)\n",
    "\t\tX[i] = u1 * sigma1 + mu1\n",
    "\tfor i in range(N-N1):\n",
    "\t\tu1 = random.uniform(-1,1)\n",
    "\t\tX[i+N1] = u1 * sigma2 + mu2\n",
    "\treturn X"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "EM算法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def GMM(X):\n",
    "\tk = 2\n",
    "\tN = len(X)\n",
    "\tmu = np.random.rand(k,1)\n",
    "\tprint (str('init mu='))\n",
    "\tprint (mu)\n",
    "\tPosterior = mat(zeros((N,k)))\t\n",
    "\tsigma = np.random.rand(k,1)\n",
    "\tprint (str('init sigma='))\n",
    "\tprint (sigma)\n",
    "\talpha = np.random.rand(k,1)\n",
    "\tdominator = 0\n",
    "\tnumerator = 0\n",
    "\tprint (str('init alpha='))\n",
    "\tprint (alpha)\n",
    "\t#先求后验概率\n",
    "\t#print (sigma)\n",
    "\tfor it in range(1000):\n",
    "\t\tfor i in range(N):\n",
    "\t\t\tdominator = 0\n",
    "\t\t\tfor j in range(k):\n",
    "\t\t\t\tdominator = dominator + np.exp(-1.0/(2.0*sigma[j]) * (X[i] - mu[j])**2)\n",
    "\t\t\t\t#print -1.0/(2.0*sigma[j]),(X[i] - mu[j])**2,-1.0/(2.0*sigma[j]) * (X[i] - mu[j])**2,np.exp(-1.0/(2.0*sigma[j]) * (X[i] - mu[j])**2)\n",
    "\t\t\t\t#return\n",
    "\t\t\tfor j in range(k):\n",
    "\t\t\t\tnumerator = np.exp(-1.0/(2.0*sigma[j]) * (X[i] - mu[j])**2)\n",
    "\t\t\t\tPosterior[i,j] = numerator/dominator\t\t\t\n",
    "\t\toldmu = copy.deepcopy(mu)\n",
    "\t\toldalpha = copy.deepcopy(alpha)\n",
    "\t\toldsigma = copy.deepcopy(sigma)\n",
    "\t\t#最大化\t\n",
    "\t\tfor j in range(k):\n",
    "\t\t\tnumerator = 0\n",
    "\t\t\tdominator = 0\n",
    "\t\t\tfor i in range(N):\n",
    "\t\t\t\tnumerator = numerator + Posterior[i,j] * X[i]\n",
    "\t\t\t\tdominator = dominator + Posterior[i,j]\n",
    "\t\t\tmu[j] = numerator/dominator\n",
    "\t\t\talpha[j] = dominator/N\n",
    "\t\t\ttmp = 0\n",
    "\t\t\tfor i in range(N):\n",
    "\t\t\t\ttmp = tmp + Posterior[i,j] * (X[i] - mu[j])**2\n",
    "\t\t\t\t#print tmp,Posterior[i,j],(X[i] - mu[j])**2 \n",
    "\t\t\tsigma[j] = tmp/dominator\n",
    "\t\t\t#print (tmp)\n",
    "\t\t\t#print (dominator)\n",
    "\t\t\t#print (sigma[j])\n",
    "\t\tif ((abs(mu - oldmu)).sum() < EPS) and \\\n",
    "\t\t\t((abs(alpha - oldalpha)).sum() < EPS) and \\\n",
    "\t\t\t((abs(sigma - oldsigma)).sum() < EPS):\n",
    "\t\t\t\tprint (str('final mu=')) \n",
    "\t\t\t\tprint (str(mu))\n",
    "\t\t\t\tprint (str('final sigma='))\n",
    "\t\t\t\tprint (str(sigma))\n",
    "\t\t\t\tprint (str('final alpha='))\n",
    "\t\t\t\tprint (str(alpha))\n",
    "\t\t\t\tprint (it)\n",
    "\t\t\t\tbreak"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Main启动"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/python3/lib/python3.6/site-packages/ipykernel_launcher.py:2: MatplotlibDeprecationWarning: \n",
      "The 'normed' kwarg was deprecated in Matplotlib 2.1 and will be removed in 3.1. Use 'density' instead.\n",
      "  \n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD4CAYAAADiry33AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAARtUlEQVR4nO3df6xfd13H8efLbgP5IRR7xaWt69RFmfzY5k0H2QJDWelEV40kdvJjEkgTsiEo0WyYbDr+AUn8PRjNqEOFTR1MqxRGI+hUHPZ2jI11DGqZrs1MLxTGz2zpePvH98x8ubu339Pe7+13/dznI/nmnvP5cb7ve5K+7un5nvM9qSokSe36vkkXIElaWga9JDXOoJekxhn0ktQ4g16SGnfSpAuYz6pVq2rdunWTLkOSThi7d+/+clVNzdf3hAz6devWMTMzM+kyJOmEkeS/F+rz1I0kNc6gl6TGGfSS1DiDXpIaZ9BLUuNGBn2StUk+mWRPknuSvHmeMUnyJ0n2JrkryTlDfZcm+WL3unTcv4Ak6cj6XF55GHhrVd2R5OnA7iQ7q2rP0JiLgDO617nAe4BzkzwLuBqYBqqbu72qvjrW30KStKCRR/RV9WBV3dEtfwO4F1g9Z9gm4C9q4HbgmUlOBV4O7KyqQ1247wQ2jvU3kCQd0VGdo0+yDjgb+PScrtXAA0Pr+7u2hdrn2/aWJDNJZmZnZ4+mLEnSEfS+MzbJ04APAW+pqq+Pu5Cq2gpsBZienj7mp6Gsu+Ijvcbd/45XHOtbSNIJpdcRfZKTGYT8B6rqw/MMOQCsHVpf07Ut1C5JOk76XHUT4H3AvVX1BwsM2w68trv65oXAQ1X1IHArsCHJyiQrgQ1dmyTpOOlz6uY84DXA3Unu7NreBvwIQFVdB+wAfg7YC3wbeF3XdyjJ24Fd3bxrqurQ+MqXJI0yMuir6t+AjBhTwGUL9G0Dth1TdZKkRfPOWElqnEEvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWrcyCdMJdkG/DxwsKqeO0//bwGvGtrec4Cp7jGC9wPfAB4FDlfV9LgKlyT10+eI/gZg40KdVfWuqjqrqs4CrgT+Zc5zYV/a9RvykjQBI4O+qm4D+j7Q+xLgxkVVJEkaq7Gdo0/yFAZH/h8aai7g40l2J9kyYv6WJDNJZmZnZ8dVliQte+P8MPYXgH+fc9rm/Ko6B7gIuCzJixeaXFVbq2q6qqanpqbGWJYkLW/jDPrNzDltU1UHup8HgVuA9WN8P0lSD2MJ+iTPAF4C/P1Q21OTPP2xZWAD8LlxvJ8kqb8+l1feCFwArEqyH7gaOBmgqq7rhv0S8PGq+tbQ1GcDtyR57H0+WFUfG1/pkqQ+RgZ9VV3SY8wNDC7DHG7bB7zgWAuTJI2Hd8ZKUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS40YGfZJtSQ4mmfd5r0kuSPJQkju711VDfRuT3Jdkb5Irxlm4JKmfPkf0NwAbR4z516o6q3tdA5BkBXAtcBFwJnBJkjMXU6wk6eiNDPqqug04dAzbXg/srap9VfUIcBOw6Ri2I0lahHGdo39Rks8m+WiSn+raVgMPDI3Z37XNK8mWJDNJZmZnZ8dUliRpHEF/B3BaVb0A+FPg745lI1W1taqmq2p6ampqDGVJkmAMQV9VX6+qb3bLO4CTk6wCDgBrh4au6dokScfRooM+yQ8nSbe8vtvmV4BdwBlJTk9yCrAZ2L7Y95MkHZ2TRg1IciNwAbAqyX7gauBkgKq6Dngl8MYkh4HvAJurqoDDSS4HbgVWANuq6p4l+S0kSQsaGfRVdcmI/j8D/myBvh3AjmMrTZI0Dt4ZK0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjTPoJalxBr0kNc6gl6TGGfSS1DiDXpIaZ9BLUuMMeklqnEEvSY0bGfRJtiU5mORzC/S/KsldSe5O8qkkLxjqu79rvzPJzDgLlyT10+eI/gZg4xH6vwS8pKqeB7wd2Dqn/6VVdVZVTR9biZKkxejzzNjbkqw7Qv+nhlZvB9YsvixJ0riM+xz964GPDq0X8PEku5NsOdLEJFuSzCSZmZ2dHXNZkrR8jTyi7yvJSxkE/flDzedX1YEkPwTsTPL5qrptvvlVtZXutM/09HSNqy5JWu7GckSf5PnA9cCmqvrKY+1VdaD7eRC4BVg/jveTJPW36KBP8iPAh4HXVNUXhtqfmuTpjy0DG4B5r9yRJC2dkaduktwIXACsSrIfuBo4GaCqrgOuAn4QeHcSgMPdFTbPBm7p2k4CPlhVH1uC30GSdAR9rrq5ZET/G4A3zNO+D3jB42dIko4n74yVpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjTPoJalxBr0kNc6gl6TGGfSS1DiDXpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxvUK+iTbkhxMMu8zXzPwJ0n2JrkryTlDfZcm+WL3unRchUuS+ul7RH8DsPEI/RcBZ3SvLcB7AJI8i8EzZs8F1gNXJ1l5rMVKko5er6CvqtuAQ0cYsgn4ixq4HXhmklOBlwM7q+pQVX0V2MmR/2BIksZsXOfoVwMPDK3v79oWan+cJFuSzCSZmZ2dHVNZkqQnzIexVbW1qqaranpqamrS5UhSM8YV9AeAtUPra7q2hdolScfJuIJ+O/Da7uqbFwIPVdWDwK3AhiQruw9hN3RtkqTj5KQ+g5LcCFwArEqyn8GVNCcDVNV1wA7g54C9wLeB13V9h5K8HdjVbeqaqjrSh7qSpDHrFfRVdcmI/gIuW6BvG7Dt6EuTJI3DE+bDWEnS0jDoJalxBr0kNc6gl6TGGfSS1DiDXpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mN6xX0STYmuS/J3iRXzNP/h0nu7F5fSPK1ob5Hh/q2j7N4SdJoIx8lmGQFcC1wIbAf2JVke1XteWxMVf3G0Pg3AWcPbeI7VXXW+EqWJB2NPkf064G9VbWvqh4BbgI2HWH8JcCN4yhOkrR4fYJ+NfDA0Pr+ru1xkpwGnA58Yqj5yUlmktye5BcXepMkW7pxM7Ozsz3KkiT1Me4PYzcDN1fVo0Ntp1XVNPCrwB8l+bH5JlbV1qqarqrpqampMZclSctXn6A/AKwdWl/Ttc1nM3NO21TVge7nPuCf+d7z95KkJdYn6HcBZyQ5PckpDML8cVfPJPlJYCXwH0NtK5M8qVteBZwH7Jk7V5K0dEZedVNVh5NcDtwKrAC2VdU9Sa4BZqrqsdDfDNxUVTU0/TnAe5N8l8EflXcMX60jSVp6I4MeoKp2ADvmtF01Z/1355n3KeB5i6hPkrRI3hkrSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGtfr8kpJOt7WXfGRXuPuf8crlriSE59H9JLUuGV7RO/RgrT8TOrf/aTzZtkGvSQtVt8AnzRP3UhS4zyil3RCO1GOqifJI3pJapxH9JI0R2v/S/CIXpIaZ9BLUuMMeklqXK+gT7IxyX1J9ia5Yp7+X0sym+TO7vWGob5Lk3yxe106zuIlSaON/DA2yQrgWuBCYD+wK8n2eZ79+tdVdfmcuc8CrgamgQJ2d3O/OpbqJUkj9TmiXw/srap9VfUIcBOwqef2Xw7srKpDXbjvBDYeW6mSpGPRJ+hXAw8Mre/v2ub65SR3Jbk5ydqjnEuSLUlmkszMzs72KEuS1Me4Poz9B2BdVT2fwVH7+492A1W1taqmq2p6ampqTGVJkvoE/QFg7dD6mq7t/1XVV6rq4W71euCn+86VJC2tPkG/CzgjyelJTgE2A9uHByQ5dWj1YuDebvlWYEOSlUlWAhu6NknScTLyqpuqOpzkcgYBvQLYVlX3JLkGmKmq7cCvJ7kYOAwcAn6tm3soydsZ/LEAuKaqDi3B7yFJWkCv77qpqh3AjjltVw0tXwlcucDcbcC2RdQoSVoE74yVpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjTPoJalxBr0kNc6gl6TGGfSS1DiDXpIaZ9BLUuMMeklqnEEvSY0z6CWpcb2+j16SxmXdFR+ZdAnLjkf0ktS4XkGfZGOS+5LsTXLFPP2/mWRPkruS/FOS04b6Hk1yZ/faPneuJGlpjTx1k2QFcC1wIbAf2JVke1XtGRr2GWC6qr6d5I3A7wO/0vV9p6rOGnPdkqSe+hzRrwf2VtW+qnoEuAnYNDygqj5ZVd/uVm8H1oy3TEnSseoT9KuBB4bW93dtC3k98NGh9ScnmUlye5JfXGhSki3duJnZ2dkeZUmS+hjrVTdJXg1MAy8Zaj6tqg4k+VHgE0nurqr/mju3qrYCWwGmp6drnHVJ0nLW54j+ALB2aH1N1/Y9krwM+B3g4qp6+LH2qjrQ/dwH/DNw9iLqlSQdpT5Bvws4I8npSU4BNgPfc/VMkrOB9zII+YND7SuTPKlbXgWcBwx/iCtJWmIjT91U1eEklwO3AiuAbVV1T5JrgJmq2g68C3ga8LdJAP6nqi4GngO8N8l3GfxRececq3UkSUus1zn6qtoB7JjTdtXQ8ssWmPcp4HmLKVCStDjeGStJjTPoJalxBr0kNc6gl6TGGfSS1DiDXpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mN6xX0STYmuS/J3iRXzNP/pCR/3fV/Osm6ob4ru/b7krx8fKVLkvoYGfRJVgDXAhcBZwKXJDlzzrDXA1+tqh8H/hB4Zzf3TAYPE/8pYCPw7m57kqTjpM8R/Xpgb1Xtq6pHgJuATXPGbALe3y3fDPxsBk8J3wTcVFUPV9WXgL3d9iRJx0mfh4OvBh4YWt8PnLvQmKo6nOQh4Ae79tvnzF0935sk2QJs6Va/meS+HrUdjVXAl492Ut455iqeGI5pXzTI/TDgfhiY+H5YZN6ctlBHn6A/LqpqK7B1qbafZKaqppdq+ycS98WA+2HA/TDQ8n7oc+rmALB2aH1N1zbvmCQnAc8AvtJzriRpCfUJ+l3AGUlOT3IKgw9Xt88Zsx24tFt+JfCJqqqufXN3Vc7pwBnAf46ndElSHyNP3XTn3C8HbgVWANuq6p4k1wAzVbUdeB/wl0n2AocY/DGgG/c3wB7gMHBZVT26RL/LKEt2WugE5L4YcD8MuB8Gmt0PGRx4S5Ja5Z2xktQ4g16SGtd80CdZm+STSfYkuSfJmydd0yQkeXKS/0zy2W4//N6ka5qkJCuSfCbJP066lklKcn+Su5PcmWRm0vVMSpJnJrk5yeeT3JvkRZOuaZyeMNfRL6HDwFur6o4kTwd2J9lZVXsmXdhx9jDwM1X1zSQnA/+W5KNVdfuoiY16M3Av8AOTLuQJ4KVVtdxvmPpj4GNV9cru6sKnTLqgcWr+iL6qHqyqO7rlbzD4xz3v3bktq4Fvdqsnd69l+Ul8kjXAK4DrJ12LJi/JM4AXM7h6kKp6pKq+Ntmqxqv5oB/Wfavm2cCnJ1vJZHSnK+4EDgI7q2pZ7gfgj4DfBr476UKeAAr4eJLd3deQLEenA7PAn3en865P8tRJFzVOyybokzwN+BDwlqr6+qTrmYSqerSqzmJwh/L6JM+ddE3HW5KfBw5W1e5J1/IEcX5VncPg22kvS/LiSRc0AScB5wDvqaqzgW8Bj/s69hPZsgj67pz0h4APVNWHJ13PpHX/Lf0kg6+OXm7OAy5Ocj+Db2L9mSR/NdmSJqeqDnQ/DwK3sDy/XXY/sH/of7g3Mwj+ZjQf9N3XJb8PuLeq/mDS9UxKkqkkz+yWvx+4EPj8ZKs6/qrqyqpaU1XrGNzB/YmqevWEy5qIJE/tLlCgO1WxAfjcZKs6/qrqf4EHkvxE1/SzDO7mb8ZyuOrmPOA1wN3d+WmAt1XVjgnWNAmnAu/vHvzyfcDfVNWyvrRQPBu4ZXAsxEnAB6vqY5MtaWLeBHygu+JmH/C6CdczVn4FgiQ1rvlTN5K03Bn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXH/B3maIwye8LeGAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "init mu=\n",
      "[[0.49885216]\n",
      " [0.22641175]]\n",
      "init sigma=\n",
      "[[0.62373759]\n",
      " [0.69778893]]\n",
      "init alpha=\n",
      "[[0.71711574]\n",
      " [0.972252  ]]\n",
      "final mu=\n",
      "[[3.66766419]\n",
      " [6.04578138]]\n",
      "final sigma=\n",
      "[[3.8539475 ]\n",
      " [0.06339081]]\n",
      "final alpha=\n",
      "[[0.69215764]\n",
      " [0.30784236]]\n",
      "24\n"
     ]
    }
   ],
   "source": [
    "X = generate_data()\n",
    "plt.hist(X, 30, normed=True)\n",
    "plt.show()\n",
    "GMM(X)\t"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
